Helper Functions

# Read Anurag's processed data, and create YearSeason variable (from Charles)
rm(list=ls())
matches = read.csv('/Users/pierlim/R_Projects/processed/matches.csv')
matches$YearSeason <- ifelse(matches$Season == "Spring", paste("01/01/", as.character(matches$Year), sep=""), paste("01/06/", as.character(matches$Year), sep=""))

Get the team efficiency (matches won / total matches played)

library(data.table)
data.table 1.10.4.3
**********
This installation of data.table has not detected OpenMP support. It should still work but in single-threaded mode. If this is a Mac, please ensure you are using R>=3.4.0 and have installed the MacOS binary package from CRAN: see ?install.packages, the 'type=' argument and the 'Binary packages' section. If you compiled from source, please reinstall and precisely follow the installation instructions on the data.table homepage. This warning message should not occur on Windows or Linux. If it does and you've followed the installation instructions on the data.table homepage, please file a GitHub issue.
**********
  The fastest way to learn (by data.table authors): https://www.datacamp.com/courses/data-analysis-the-data-table-way
  Documentation: ?data.table, example(data.table) and browseVignettes("data.table")
  Release notes, videos and slides: http://r-datatable.com
library(ggplot2)
# Total matches played by each team (consider them as blue or red)
blueTeams = data.frame(table(matches$blueTeamTag))
redTeams = data.frame(table(matches$redTeamTag))
names(blueTeams)[names(blueTeams) == 'Var1'] <- 'Team'
names(redTeams)[names(redTeams) == 'Var1'] <- 'Team'
combinedPlayed <- merge(blueTeams, redTeams, by="Team", all=TRUE)
setnames(combinedPlayed, "Freq.y", "rMatchesPlayed")
setnames(combinedPlayed, "Freq.x", "bMatchesPlayed")
combinedPlayed$totalPlayed <- combinedPlayed$bMatchesPlayed + combinedPlayed$rMatchesPlayed
# Get each team wins, whether they won as blue or red
bluewin <- subset(matches, matches$bResult==1)
redwin <- subset(matches, matches$rResult==1)
blueTeamWin = data.frame(table(bluewin$blueTeamTag))
redTeamWin = data.frame(table(redwin$redTeamTag))
names(blueTeamWin)[names(blueTeamWin) == 'Var1'] <- 'Team'
names(redTeamWin)[names(redTeamWin) == 'Var1'] <- 'Team'
combinedWins <- merge(blueTeamWin, redTeamWin, by="Team", all=TRUE)
setnames(combinedWins, "Freq.y", "rMatchesWon")
setnames(combinedWins, "Freq.x", "bMatchesWon")
combinedWins$totalWon <- combinedWins$bMatchesWon + combinedWins$rMatchesWon
# Merge and calc the win efficiency of each team
combinedTeams <- merge(combinedWins, combinedPlayed, by="Team")
combinedTeams$winEfficiency = combinedTeams$totalWon / combinedTeams$totalPlayed
combinedTeams[is.na(combinedTeams)] <- 0 
# Plotting top 20 just for visualization
top20efficient <- (combinedTeams[with(combinedTeams, order(-combinedTeams$winEfficiency)), ])[1:20, ]
ggplot(top20efficient, aes(x=Team, y=winEfficiency)) + geom_bar(stat="identity") +ggtitle("Top 20 Teams Most Efficient At Winning") + theme(plot.title = element_text(hjust = 0.5))

Personally, I don’t feel this is an extremely accurate value as it could be that the team played 10 games and won them all. Eg SSW who played 17 games and won 15. Contrast that to SKT, who played many games, and naturally did not win them all. As such, I don’t think it’s necessary to group the win efficiency by season.

That said, it is good enough to be used as a predictor as SKT, the undisputed champion is among the top 10 in win efficiency.

Put this new derived value back into processed -> matches for both blueTeamTag and redTeamTag.

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKSGVscGVyIEZ1bmN0aW9ucwpgYGB7cn0KCmBgYAoKCmBgYHtyfQojIFJlYWQgQW51cmFnJ3MgcHJvY2Vzc2VkIGRhdGEsIGFuZCBjcmVhdGUgWWVhclNlYXNvbiB2YXJpYWJsZSAoZnJvbSBDaGFybGVzKQpybShsaXN0PWxzKCkpCm1hdGNoZXMgPSByZWFkLmNzdignL1VzZXJzL3BpZXJsaW0vUl9Qcm9qZWN0cy9wcm9jZXNzZWQvbWF0Y2hlcy5jc3YnKQptYXRjaGVzJFllYXJTZWFzb24gPC0gaWZlbHNlKG1hdGNoZXMkU2Vhc29uID09ICJTcHJpbmciLCBwYXN0ZSgiMDEvMDEvIiwgYXMuY2hhcmFjdGVyKG1hdGNoZXMkWWVhciksIHNlcD0iIiksIHBhc3RlKCIwMS8wNi8iLCBhcy5jaGFyYWN0ZXIobWF0Y2hlcyRZZWFyKSwgc2VwPSIiKSkKCgpgYGAKCkdldCB0aGUgdGVhbSBlZmZpY2llbmN5IChtYXRjaGVzIHdvbiAvIHRvdGFsIG1hdGNoZXMgcGxheWVkKQoKYGBge3J9CmxpYnJhcnkoZGF0YS50YWJsZSkKbGlicmFyeShnZ3Bsb3QyKQojIFRvdGFsIG1hdGNoZXMgcGxheWVkIGJ5IGVhY2ggdGVhbSAoY29uc2lkZXIgdGhlbSBhcyBibHVlIG9yIHJlZCkKYmx1ZVRlYW1zID0gZGF0YS5mcmFtZSh0YWJsZShtYXRjaGVzJGJsdWVUZWFtVGFnKSkKcmVkVGVhbXMgPSBkYXRhLmZyYW1lKHRhYmxlKG1hdGNoZXMkcmVkVGVhbVRhZykpCm5hbWVzKGJsdWVUZWFtcylbbmFtZXMoYmx1ZVRlYW1zKSA9PSAnVmFyMSddIDwtICdUZWFtJwpuYW1lcyhyZWRUZWFtcylbbmFtZXMocmVkVGVhbXMpID09ICdWYXIxJ10gPC0gJ1RlYW0nCmNvbWJpbmVkUGxheWVkIDwtIG1lcmdlKGJsdWVUZWFtcywgcmVkVGVhbXMsIGJ5PSJUZWFtIiwgYWxsPVRSVUUpCnNldG5hbWVzKGNvbWJpbmVkUGxheWVkLCAiRnJlcS55IiwgInJNYXRjaGVzUGxheWVkIikKc2V0bmFtZXMoY29tYmluZWRQbGF5ZWQsICJGcmVxLngiLCAiYk1hdGNoZXNQbGF5ZWQiKQpjb21iaW5lZFBsYXllZCR0b3RhbFBsYXllZCA8LSBjb21iaW5lZFBsYXllZCRiTWF0Y2hlc1BsYXllZCArIGNvbWJpbmVkUGxheWVkJHJNYXRjaGVzUGxheWVkCgojIEdldCBlYWNoIHRlYW0gd2lucywgd2hldGhlciB0aGV5IHdvbiBhcyBibHVlIG9yIHJlZApibHVld2luIDwtIHN1YnNldChtYXRjaGVzLCBtYXRjaGVzJGJSZXN1bHQ9PTEpCnJlZHdpbiA8LSBzdWJzZXQobWF0Y2hlcywgbWF0Y2hlcyRyUmVzdWx0PT0xKQpibHVlVGVhbVdpbiA9IGRhdGEuZnJhbWUodGFibGUoYmx1ZXdpbiRibHVlVGVhbVRhZykpCnJlZFRlYW1XaW4gPSBkYXRhLmZyYW1lKHRhYmxlKHJlZHdpbiRyZWRUZWFtVGFnKSkKbmFtZXMoYmx1ZVRlYW1XaW4pW25hbWVzKGJsdWVUZWFtV2luKSA9PSAnVmFyMSddIDwtICdUZWFtJwpuYW1lcyhyZWRUZWFtV2luKVtuYW1lcyhyZWRUZWFtV2luKSA9PSAnVmFyMSddIDwtICdUZWFtJwpjb21iaW5lZFdpbnMgPC0gbWVyZ2UoYmx1ZVRlYW1XaW4sIHJlZFRlYW1XaW4sIGJ5PSJUZWFtIiwgYWxsPVRSVUUpCnNldG5hbWVzKGNvbWJpbmVkV2lucywgIkZyZXEueSIsICJyTWF0Y2hlc1dvbiIpCnNldG5hbWVzKGNvbWJpbmVkV2lucywgIkZyZXEueCIsICJiTWF0Y2hlc1dvbiIpCmNvbWJpbmVkV2lucyR0b3RhbFdvbiA8LSBjb21iaW5lZFdpbnMkYk1hdGNoZXNXb24gKyBjb21iaW5lZFdpbnMkck1hdGNoZXNXb24KCiMgTWVyZ2UgYW5kIGNhbGMgdGhlIHdpbiBlZmZpY2llbmN5IG9mIGVhY2ggdGVhbQpjb21iaW5lZFRlYW1zIDwtIG1lcmdlKGNvbWJpbmVkV2lucywgY29tYmluZWRQbGF5ZWQsIGJ5PSJUZWFtIikKY29tYmluZWRUZWFtcyR3aW5FZmZpY2llbmN5ID0gY29tYmluZWRUZWFtcyR0b3RhbFdvbiAvIGNvbWJpbmVkVGVhbXMkdG90YWxQbGF5ZWQKY29tYmluZWRUZWFtc1tpcy5uYShjb21iaW5lZFRlYW1zKV0gPC0gMCAKCiMgUGxvdHRpbmcgdG9wIDIwIGp1c3QgZm9yIHZpc3VhbGl6YXRpb24KdG9wMjBlZmZpY2llbnQgPC0gKGNvbWJpbmVkVGVhbXNbd2l0aChjb21iaW5lZFRlYW1zLCBvcmRlcigtY29tYmluZWRUZWFtcyR3aW5FZmZpY2llbmN5KSksIF0pWzE6MjAsIF0KZ2dwbG90KHRvcDIwZWZmaWNpZW50LCBhZXMoeD1UZWFtLCB5PXdpbkVmZmljaWVuY3kpKSArIGdlb21fYmFyKHN0YXQ9ImlkZW50aXR5IikgK2dndGl0bGUoIlRvcCAyMCBUZWFtcyBNb3N0IEVmZmljaWVudCBBdCBXaW5uaW5nIikgKyB0aGVtZShwbG90LnRpdGxlID0gZWxlbWVudF90ZXh0KGhqdXN0ID0gMC41KSkKYGBgCgpQZXJzb25hbGx5LCBJIGRvbid0IGZlZWwgdGhpcyBpcyBhbiBleHRyZW1lbHkgYWNjdXJhdGUgdmFsdWUgYXMgaXQgY291bGQgYmUgdGhhdCB0aGUgdGVhbSBwbGF5ZWQgMTAgZ2FtZXMgYW5kIHdvbiB0aGVtIGFsbC4gRWcgU1NXIHdobyBwbGF5ZWQgMTcgZ2FtZXMgYW5kIHdvbiAxNS4gQ29udHJhc3QgdGhhdCB0byBTS1QsIHdobyBwbGF5ZWQgbWFueSBnYW1lcywgYW5kIG5hdHVyYWxseSBkaWQgbm90IHdpbiB0aGVtIGFsbC4gQXMgc3VjaCwgSSBkb24ndCB0aGluayBpdCdzIG5lY2Vzc2FyeSB0byBncm91cCB0aGUgd2luIGVmZmljaWVuY3kgYnkgc2Vhc29uLiAKClRoYXQgc2FpZCwgaXQgaXMgZ29vZCBlbm91Z2ggdG8gYmUgdXNlZCBhcyBhIHByZWRpY3RvciBhcyBTS1QsIHRoZSB1bmRpc3B1dGVkIGNoYW1waW9uIGlzIGFtb25nIHRoZSB0b3AgMTAgaW4gd2luIGVmZmljaWVuY3kuIAoKUHV0IHRoaXMgbmV3IGRlcml2ZWQgdmFsdWUgYmFjayBpbnRvIHByb2Nlc3NlZCAtPiBtYXRjaGVzIGZvciBib3RoIGJsdWVUZWFtVGFnIGFuZCByZWRUZWFtVGFnLgoKYGBge3J9CmNvbWJpbmVkVGVhbXNfYmx1ZSA9IGNvbWJpbmVkVGVhbXNbLC1jKDI6NyldICMgcmVtb3ZlIHVuZWNlc3NhcnkgY29sdW1ucwpzZXRuYW1lcyhjb21iaW5lZFRlYW1zX2JsdWUsICJ3aW5FZmZpY2llbmN5IiwgImJXaW5FZmZpY2llbmN5IikKc2V0bmFtZXMoY29tYmluZWRUZWFtc19ibHVlLCAiVGVhbSIsICJibHVlVGVhbVRhZyIpCm1hdGNoZXMgPC0gbWVyZ2UoY29tYmluZWRUZWFtc19ibHVlLCBtYXRjaGVzLCBieT0iYmx1ZVRlYW1UYWciKQoKY29tYmluZWRUZWFtc19yZWQgPSBjb21iaW5lZFRlYW1zWywtYygyOjcpXSAjIHJlbW92ZSB1bmVjZXNzYXJ5IGNvbHVtbnMKc2V0bmFtZXMoY29tYmluZWRUZWFtc19yZWQsICJ3aW5FZmZpY2llbmN5IiwgInJXaW5FZmZpY2llbmN5IikKc2V0bmFtZXMoY29tYmluZWRUZWFtc19yZWQsICJUZWFtIiwgInJlZFRlYW1UYWciKQptYXRjaGVzIDwtIG1lcmdlKGNvbWJpbmVkVGVhbXNfcmVkLCBtYXRjaGVzLCBieT0icmVkVGVhbVRhZyIpCgptb3ZldG9sYXN0IDwtIGZ1bmN0aW9uKGRhdGEsIG1vdmUpIHsKICBkYXRhW2Moc2V0ZGlmZihuYW1lcyhkYXRhKSwgbW92ZSksIG1vdmUpXQp9Cgptb3ZldG9sYXN0KG1hdGNoZXMsIGMoImJXaW5FZmZpY2llbmN5IiwgInJXaW5FZmZpY2llbmN5IikpCm1hdGNoZXMgPC0gKG1hdGNoZXNbd2l0aChtYXRjaGVzLCBvcmRlcigtbWF0Y2hlcyRMZWFndWUpKSwgXSkKaGVhZChtYXRjaGVzKQojd3JpdGUuY3N2KG1hdGNoZXMsIGZpbGUgPSAibWF0Y2hlc19wcm9jZXNzZWQuY3N2IikKCgpgYGAKCg==